---
title: "Expectation_EEG_Analysis_Stats"
output: html_document
date: "2023-11-29"
---


```{r, include=FALSE}
library(base)       # The R Base Package
library(stringr)    # Simple, Consistent Wrappers for Common String Operations (filtering of partial name)
library(rstudioapi) # Safely Access the RStudio API 

library(ggplot2)    # Create Elegant Data Visualisations Using the Grammar of Graphics
library(ggpubr)     # 'ggplot2' Based Publication Ready Plots
library(ggsignif)   # Significance Brackets for 'ggplot2'
library(ggsci)      # Scientific Journal and Sci-Fi Themed Color Palettes for 'ggplot2'
library(ggrain)     # A Rainclouds Geom for 'ggplot2'


library(rstatix)    # Pipe-Friendly Framework for Basic Statistical Tests 
library(lmerTest)   # Tests in Linear Mixed Effects Models
library(stats)      # The R Stats Package
library(emmeans)    # Estimated Marginal Means, aka Least-Squares Means
library(car)        # Companion to Applied Regression
library(pbkrtest)   # Parametric Bootstrap, Kenward-Roger and Satterthwaite Based Methods for Test in Mixed Models
library(effectsize) # Indices of Effect Size
```


### load data

```{r, include=FALSE}
current_path = rstudioapi::getActiveDocumentContext()$path 
setwd(dirname(current_path )) #sets working directory to the current file path, make sure csv files are in the same folder

phaseLocked <-read.delim("phaseLocked_aggregated.csv", header=TRUE, sep=";")
alpha <-read.delim("alpha_aggregated.csv", header=TRUE, sep=";")
theta <-read.delim("theta_aggregated.csv", header=TRUE, sep=";")
beta <-read.delim("beta_aggregated.csv", header=TRUE, sep=";")
```

### sort tables

```{r}
phaseL_full <-convert_as_factor(phaseLocked, vars=  c("cue", "temperature","subject", "electrode", "condition"))
phaseL_full <-as.data.frame(filter(phaseL_full, electrode == "Fz")) #electrode with the highest test statistic in the cluster-based multi-sensor Wilcoxon signed-rank test against 0

theta_full <-convert_as_factor(theta, vars=  c("cue", "temperature","subject", "electrode", "condition"))
theta_full <-as.data.frame(filter(theta_full, electrode == "C2"))

alpha_full <-convert_as_factor(alpha, vars=  c("cue", "temperature","subject", "electrode", "condition"))
alpha_full <-as.data.frame(filter(alpha_full, electrode == "PO3"))

beta_full <-convert_as_factor(beta, vars=  c("cue","subject", "electrode", "condition"))
beta_full <-as.data.frame(filter(beta_full, electrode == "FCz"))
```

## Phase-locked Response

### Check Assumptions / Outliers
```{r}
wilcox.test(phaseL_full$amplitude)

check_data <- phaseL_full
lmm <- lmer(amplitude ~ cue *condition+ (1|subject), data=check_data)
qqnorm(resid(lmm))
qqline(resid(lmm))

check_data$cook <- cooks.distance(lmm)
plot(check_data$cook, check_data$subject)

#log transformation to make data more conform with normality (if necessary)
#if amplitudes are negative, might need +1 because log only works on positive numbers
#if amplitude very small might need to *10, log has almost no effect on small numbers
check_data$amp_log <- log(check_data$amplitude+10)
lmm_log <- lmer(amp_log ~ cue *condition+ (1| subject), data=check_data)
qqnorm(resid(lmm_log))
qqline(resid(lmm_log))

check_data$cook <- cooks.distance(lmm_log)
plot(check_data$cook, check_data$subject)

check_data$amplitude <- replace(check_data$amplitude, c(check_data$cook > 1),NA) #replaces amplitude with NA for all rows in which cook's distance is larger than 1
check_data$amp_log <- replace(check_data$amp_log, c(check_data$cook > 1),NA) #does the same for the row with log amplitudes
phaseL_full <- check_data

phaseL_full<- subset(phaseL_full, subject!="S_19") #removed subject 19 from analysis as it was identified as an outlier
```

### Linear Mixed Model
```{r}
LMM_amplitudeMod <- lmer(amp_log ~ cue *condition+  (1|subject) , data=phaseL_full, REML=TRUE)
summary(LMM_amplitudeMod)
anova(LMM_amplitudeMod, ddf="Kenward-Roger", type =3)
emmeans(LMM_amplitudeMod, list(pairwise ~ cue| condition), adjust = "tukey" ) #post-hoc pairwise comparison
emmeans(LMM_amplitudeMod, list(pairwise ~ condition| cue), adjust = "tukey" ) #post-hoc pairwise comparison

#calculate effect size based on F-value
F_to_eta2(7.7556, 1, 114) #cue
F_to_eta2( 6.2998, 1, 114) #condition
F_to_eta2(25.6586, 1, 114) #interaction

#plot
ggerrorplot(phaseL_full, x="cue", y="amplitude", facet.by = "condition", desc.stat= "mean_ci", combine = TRUE, size= 1, 
           add=c( "jitter"),add.params = list(color="black"), title= "phase-locked response (Fz)", xlab= FALSE, 
           ylab="amplitude [uV]", color="cue", palette = c("Red", "Blue"), legend="RIGHT",
          panel.labs.background=list(color="white", fill="white"), panel.labs.font=list(face="bold", size=11))+
  theme(panel.border = element_blank(), axis.line = element_line()) + 
  geom_exec(geom_line, phaseL_full, x="cue", y="amplitude",  group="subject", color="grey") 
```

#-----------------------------------------------------------------------------------------
## Theta
```{r}
LMM_amplitudeMod <- lmer(amp_log ~ cue *condition+ (1|subject), data=theta_full, REML=TRUE)
summary(LMM_amplitudeMod)
emmeans(LMM_amplitudeMod, list(pairwise ~ cue| condition), adjust = "tukey" ) #post-hoc
if(requireNamespace("pbkrtest", quietly = TRUE)) #sets settings for ANOVA and LMM the same
anova(LMM_amplitudeMod, ddf="Kenward-Roger", type =3)

```

### Check Assumptions / Outliers
```{r}
wilcox.test(theta_full$amplitude)

check_data <- theta_full
lmm <- lmer(amplitude ~ cue *condition+ (1|subject), data=check_data)
qqnorm(resid(lmm))
qqline(resid(lmm))

check_data$cook <- cooks.distance(lmm)
plot(check_data$cook, check_data$subject)

#log transformation to make data more conform with normality (if necessary)
#if amplitudes are negative, might need +1 because log only works on positive numbers
#if amplitude very small might need to *10, log has almost no effect on small numbers
check_data$amp_log <- log(check_data$amplitude*10+10)
lmm_log <- lmer(amp_log ~ cue *condition+ (1| subject), data=check_data)
qqnorm(resid(lmm_log))
qqline(resid(lmm_log))

check_data$cook <- cooks.distance(lmm_log)
plot(check_data$cook, check_data$subject)

check_data$amplitude <- replace(check_data$amplitude, c(check_data$cook > 1),NA) #replaces amplitude with NA for all rows in which cook's distance is larger than 1
check_data$amp_log <- replace(check_data$amp_log, c(check_data$cook > 1),NA)
theta_full <- check_data
```

### Linear Mixed Model and Effect Size
```{r}
LMM_amplitudeMod <- lmer(amp_log ~ cue *condition+  (1|subject) , data=theta_full, REML=TRUE)
summary(LMM_amplitudeMod)
anova(LMM_amplitudeMod, ddf="Kenward-Roger", type =3)
emmeans(LMM_amplitudeMod, list(pairwise ~ condition| cue), adjust = "tukey" ) #post-hoc
emmeans(LMM_amplitudeMod, list(pairwise ~ cue| condition), adjust = "tukey" ) #post-hoc

#Effect Size
F_to_eta2( 4.9918, 1, 117) #cue
F_to_eta2(  4.5879, 1, 117) #condition
F_to_eta2( 2.2478, 1, 117) #interaction


ggerrorplot(theta_full, x="cue", y="amplitude", facet.by = "condition", desc.stat= "mean_ci", combine = TRUE, size= 1, 
           add=c( "jitter"),add.params = list(color="black"), title= "modulation theta frequency band (C2)", xlab= FALSE, 
           ylab="amplitude [uV]", color="cue", palette = c("Red", "Blue"), legend="RIGHT",
          panel.labs.background=list(color="white", fill="white"), panel.labs.font=list(face="bold", size=11))+
  theme(panel.border = element_blank(), axis.line = element_line()) + 
  geom_exec(geom_line, theta_full, x="cue", y="amplitude",  group="subject", color="grey") 
```
  
#-----------------------------------------------------------------------------------------
## Alpha
### check assumptions / outliers
```{r}
wilcox.test(alpha_full$amplitude)

check_data <- alpha_full
lmm <- lmer(amplitude ~ cue *condition+ (1|subject), data=check_data)
qqnorm(resid(lmm))
qqline(resid(lmm))

check_data$cook <- cooks.distance(lmm)
plot(check_data$cook, check_data$subject)

#log transformation to make data more conform with normality (if necessary)
#if amplitudes are negative, might need +1 because log only works on positive numbers
#if amplitude very small might need to *10, log has almost no effect on small numbers
check_data$amp_log <- log(check_data$amplitude*10+10)
lmm_log <- lmer(amp_log ~ cue *condition+ (1| subject), data=check_data)
qqnorm(resid(lmm_log))
qqline(resid(lmm_log))

check_data$cook <- cooks.distance(lmm_log)
plot(check_data$cook, check_data$subject)

check_data$amplitude <- replace(check_data$amplitude, c(check_data$cook > 1),NA) #replaces amplitude with NA for all rows in which cook's distance is larger than 1
check_data$amp_log <- replace(check_data$amp_log, c(check_data$cook > 1),NA)
alpha_full <- check_data
```

### Linear Mixed Model
```{r}
LMM_amplitudeMod <- lmer(amp_log ~ cue *condition+  (1|subject) , data=alpha_full, REML=TRUE)
summary(LMM_amplitudeMod)
anova(LMM_amplitudeMod, ddf="Kenward-Roger", type =3)
emmeans(LMM_amplitudeMod, list(pairwise ~ condition| cue), adjust = "tukey" ) #post-hoc
emmeans(LMM_amplitudeMod, list(pairwise ~ cue| condition), adjust = "tukey" ) #post-hoc

# Effect Size
F_to_eta2( 10.7606, 1, 117) #cue
F_to_eta2( 4.4704, 1, 117) #condition
F_to_eta2( 8.8613, 1, 117) #interaction

ggerrorplot(alpha_full, x="cue", y="amplitude", facet.by = "condition", desc.stat= "mean_ci", combine = TRUE, size= 1, 
           add=c( "jitter"),add.params = list(color="black"), title= "modulation beta frequency band (PO3)", xlab= FALSE, 
           ylab="amplitude [uV]", color="cue", palette = c("Red", "Blue"), legend="RIGHT",
          panel.labs.background=list(color="white", fill="white"), panel.labs.font=list(face="bold", size=11))+
  theme(panel.border = element_blank(), axis.line = element_line()) + 
  geom_exec(geom_line, alpha_full, x="cue", y="amplitude",  group="subject", color="grey")

# raincloudplot
ggplot(alpha_full, aes(condition, amplitude, fill=cue, color=cue))+
  geom_rain(alpha=.5,rain.side = "r",
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(position = ggpp::position_dodgenudge(x = .2), width = 0.15), 
            violin.args.pos = list( side = "r",
              width = 0.7, position = position_nudge(x = 0.3)))+
  labs(x="temperature", y="amplitude [µV]")+
  font("legend.title", face = "bold")+
  font("xy.title", size = 16)+
  font("xy.text", size = 14)+
  theme_classic() +
  scale_fill_brewer(palette = 'Set1')+
  stat_compare_means(method = "t.test", paired=TRUE, label = "p.signif",label.x.npc = "middle")

```


#-----------------------------------------------------------------------------------------

### Beta

```{r}
LMM_amplitudeMod <- lmer(amplitude ~ cue *condition+ (1|subject), data=beta_full, REML=TRUE)
summary(LMM_amplitudeMod)
emmeans(LMM_amplitudeMod, list(pairwise ~ cue| condition), adjust = "tukey" ) #post-hoc
if(requireNamespace("pbkrtest", quietly = TRUE)) #sets settings for ANOVA and LMM the same
anova(LMM_amplitudeMod, ddf="Kenward-Roger", type =3)

```

## check assumptions / outliers
```{r}
wilcox.test(beta_full$amplitude)

check_data <- beta_full
lmm <- lmer(amplitude ~ cue *condition+ (1|subject), data=check_data)
qqnorm(resid(lmm))
qqline(resid(lmm))

check_data$cook <- cooks.distance(lmm)
plot(check_data$cook, check_data$subject)

#log transformation to make data more conform with normality (if necessary)
#if amplitudes are negative, might need +1 because log only works on positive numbers
#if amplitude very small might need to *10, log has almost no effect on small numbers
check_data$amp_log <- log(check_data$amplitude*10+10)
lmm_log <- lmer(amp_log ~ cue *condition+ (1| subject), data=check_data)
qqnorm(resid(lmm_log))
qqline(resid(lmm_log))

check_data$cook <- cooks.distance(lmm_log)
plot(check_data$cook, check_data$subject)

check_data$amplitude <- replace(check_data$amplitude, c(check_data$cook > 1),NA) #replaces amplitude with NA for all rows in which cook's distance is larger than 1
check_data$amp_log <- replace(check_data$amp_log, c(check_data$cook > 1),NA)
beta_full <- check_data
```

```{r}
LMM_amplitudeMod <- lmer(amplitude ~ cue *condition+  (1|subject) , data=beta_full, REML=TRUE) #does not need log transform!
summary(LMM_amplitudeMod)
anova(LMM_amplitudeMod, ddf="Kenward-Roger", type =3)
emmeans(LMM_amplitudeMod, list(pairwise ~ condition| cue), adjust = "tukey" ) #post-hoc
emmeans(LMM_amplitudeMod, list(pairwise ~ cue| condition), adjust = "tukey" ) #post-hoc

# Effect Size
F_to_eta2(10.3737, 1, 117) #cue
F_to_eta2( 1.9212, 1, 117) #condition
F_to_eta2(13.7841, 1, 117) #interaction

ggerrorplot(beta_full, x="cue", y="amplitude", facet.by = "condition", desc.stat= "mean_ci", combine = TRUE, size= 1, 
           add=c( "jitter"),add.params = list(color="black"), title= "modulation beta frequency band (FCz)", xlab= FALSE, 
           ylab="amplitude [uV]", color="cue", palette = c("Red", "Blue"), legend="RIGHT",
          panel.labs.background=list(color="white", fill="white"), panel.labs.font=list(face="bold", size=11))+
  theme(panel.border = element_blank(), axis.line = element_line()) + 
  geom_exec(geom_line, beta_full, x="cue", y="amplitude",  group="subject", color="grey")
```

#----------------------------------------------------------------------------------------------------------------
### Plots
```{r}
plotA <- ggplot(phaseL_full, aes(condition, amplitude, fill=cue, color=cue))+
  geom_rain(alpha=.5,rain.side = "r",
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(position = ggpp::position_dodgenudge(x = .2), width = 0.15), 
            violin.args.pos = list( side = "r",
              width = 0.7, position = position_nudge(x = 0.3)))+
      labs(x="temperature", y="amplitude [µV]")+
  font("legend.title", face = "bold")+
  font("xy.title", size = 16)+
  font("xy.text", size = 14)+
    theme_classic() +
    scale_fill_brewer(palette = 'Set1')+
  stat_compare_means(method = "t.test", paired=TRUE, label = "p.signif",label.x.npc = "middle", hide.ns = TRUE)

plotB <-  ggplot(theta_full, aes(condition, amplitude, fill=cue, color=cue))+
  geom_rain(alpha=.5,rain.side = "r",
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(position = ggpp::position_dodgenudge(x = .2), width = 0.15), 
            violin.args.pos = list( side = "r",
              width = 0.7, position = position_nudge(x = 0.3)))+
      labs(x="temperature", y="amplitude [µV]")+
  font("legend.title", face = "bold")+
  font("xy.title", size = 16)+
  font("xy.text", size = 14)+
    theme_classic() +
    scale_fill_brewer(palette = 'Set1')+
  stat_compare_means(method = "t.test", paired=TRUE, label = "p.signif",label.x.npc = "middle", hide.ns = TRUE)
 
plotC <-   ggplot(alpha_full, aes(condition, amplitude, fill=cue, color=cue))+
  geom_rain(alpha=.5,rain.side = "r",
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(position = ggpp::position_dodgenudge(x = .2), width = 0.15), 
            violin.args.pos = list( side = "r",
              width = 0.7, position = position_nudge(x = 0.3)))+
      labs(x="temperature", y="amplitude [µV]")+
  ylim(-0.3, 0.4)+
  font("legend.title", face = "bold")+
  font("xy.title", size = 16)+
  font("xy.text", size = 14)+
    theme_classic() +
    scale_fill_brewer(palette = 'Set1')+
  stat_compare_means(method = "t.test", paired=TRUE, label = "p.signif",label.x.npc = "middle", hide.ns = TRUE)
  
plotD <-    ggplot(beta_full, aes(condition, amplitude, fill=cue, color=cue))+
  geom_rain(alpha=.5,rain.side = "r",
            boxplot.args = list(color = "black", outlier.shape = NA),
            boxplot.args.pos = list(position = ggpp::position_dodgenudge(x = .2), width = 0.15), 
            violin.args.pos = list( side = "r",
              width = 0.7, position = position_nudge(x = 0.3)))+
      labs(x="temperature", y="amplitude [µV]")+
  ylim(-0.3, 0.4)+
  font("legend.title", face = "bold")+
  font("xy.title", size = 16)+
  font("xy.text", size = 14)+
    theme_classic() +
    scale_fill_brewer(palette = 'Set1')+
  stat_compare_means(method = "t.test", paired=TRUE, label = "p.signif",label.x.npc = "middle", hide.ns = TRUE)

ggarrange(plotA, plotB, plotC, plotD, ncol = 2, nrow = 2, common.legend = TRUE, labels = "AUTO")
```